#coding:utf8
import requests
import re
from lxml import etree

# 顺便提一下，cookie参数也要是全部评论网页滴那个cookie，大众点评每个网址cookie参数也不同····你得把最新的cookie给弄到自己的pycharm代码请求头上去！
# TODO： 评论区的信息并没有加载到网页当中(请求头问题)
# 请求头
headers = {
            'Connection': 'keep-alive',
            'Host': 'www.dianping.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
            'Cookie': 'fspop=test; cy=1305; cye=huaiyang; _lxsdk=181aa7ad838c8-03fafa0ef11406-26021a51-154ac4-181aa7ad838c8; _lxsdk_cuid=181aa7ad838c8-03fafa0ef11406-26021a51-154ac4-181aa7ad838c8; _hc.v=492fd566-e5d1-725b-d6a8-df2ece63c8df.1656422587; s_ViewType=10; dper=35e3a1c591e245be209e4ebd5830e3c4171c5892b304e9c3b7213e1902b777f0716c0a139036324bf35dae94b160677df284f026b15ff9fcb176396112fd73b1; ll=7fd06e815b796be3df069dec7836c3df; _lx_utm=utm_source=Baidu&utm_medium=organic; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1656422587,1656467024,1656542870; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1656543122; _lxsdk_s=181b1a636f7-7a8-5e0-fee||49'
}

# 网址
url = 'http://www.dianping.com/shop/H9FNPpCqj1Tu98oD/review_all'

# 发起请求
r = requests.get(url=url, headers=headers)
# 如果状态码为200显示正常
if r.status_code == 200:
    print("访问成功")
    text = r.text.encode("gbk", "ignore").decode("gbk", "ignore")  # 解决报错双重严格限制
    # print(text)
    # 转换为xpath对象
    tree = etree.HTML(text)
    # 保存所有字体类映射属性
    class_list = []
    # 字体映射列表列表属性
    svgmtsi_list = tree.xpath(r'//div[@class="main-review"]/div[@class="review-truncated-words"]/svgmtsi/@class')
    print(len(svgmtsi_list))
    for svgmtsi in svgmtsi_list:
        class_list.append(svgmtsi)
    print(class_list)
    # 保存网页源码
    with open('source1.html', 'w+', encoding='utf8') as f:
        f.write(text)
else:
     print("被反爬了！小夜斗赶紧跑路！")

